Data Analysis Report

Prepared By: Zahiruddin Zahidanishah

In [1]:
from IPython.display import display_html
def display_side_by_side(*args):
    html_str=''
    for df in args:
        html_str+=df.to_html()
    display_html(html_str.replace('table','table style="display:inline"'),raw=True)
In [2]:
from IPython.display import HTML

HTML('''<script>
code_show=true; 
function code_toggle() {
 if (code_show){
 $('div.input').hide();
 } else {
 $('div.input').show();
 }
 code_show = !code_show
} 
$( document ).ready(code_toggle);
</script>
<form action="javascript:code_toggle()"><input type="submit" value="Click here to toggle on/off the raw code."></form>''')
Out[2]:

Water Consumption Report

In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
#data = {'Month':['Jan', 'Feb'], 'Usage':[355,314], 'Amount':[107.05,85.55]}
#df = pd.DataFrame(data, columns=['Month', 'Usage', 'Amount'])
#df.to_excel('utilities.xlsx', sheet_name='Sheet1')
df1 = pd.read_excel('water_utils.xls', sheet_name='2019')
df2 = pd.read_excel('water_utils.xls', sheet_name='2020')
df3 = pd.read_excel('water_utils.xls', sheet_name='2021')
df4 = pd.read_excel('water_utils.xls', sheet_name='2022')

Introduction¶

This report reports shows the water consumption and amount payable for a typical double storey house in urban city located in Selangor, Malaysia. This report consists of data from year 2014 to date. This report will shows the relation between the consumption and payment for the water usage every month and every year.

The details of the house inventory and areas are as describe belows.

In [4]:
data = {'Items':['Type','Land Area (sq.m)','Build Up Area (sq.m)','Bedrooms','Bathrooms',
                 'Lighting Pts.','Power Pts.','Fan Pts.','AC Pts.','Water Taps'],
        'Description':['Double Storey Terrace House',121,180,4,3,22,14,4,3,7]}
df_house = pd.DataFrame(data,columns=['Items','Description'])
df_house.style.set_caption("Assets Details")                   
Out[4]:
Assets Details
  Items Description
0 Type Double Storey Terrace House
1 Land Area (sq.m) 121
2 Build Up Area (sq.m) 180
3 Bedrooms 4
4 Bathrooms 3
5 Lighting Pts. 22
6 Power Pts. 14
7 Fan Pts. 4
8 AC Pts. 3
9 Water Taps 7
In [5]:
data = {'Year':[2019,2020,2021,2022],
       'Total Usage (m3)':[df1['Usage'].sum(),df2['Usage'].sum(),df3['Usage'].sum(),df4['Usage'].sum()],
       'Total Cost (RM)':[df1['Cost'].sum(),df2['Cost'].sum(),df3['Cost'].sum(),df4['Cost'].sum()],
       'Average Usage (m3)':[df1['Usage'].mean(),df2['Usage'].mean(),df3['Usage'].mean(),df4['Usage'].mean()],
       'Average Cost (RM)':[df1['Cost'].mean(),df2['Cost'].mean(),df3['Cost'].mean(),df4['Cost'].mean()],
       'Max. Usage (m3)':[df1['Usage'].max(),df2['Usage'].max(),df3['Usage'].max(),df4['Usage'].max()],
       'Max. Cost (RM)':[df1['Cost'].max(),df2['Cost'].max(),df3['Cost'].max(),df4['Cost'].max()]}
df = pd.DataFrame(data,columns=['Year','Total Usage (m3)','Average Usage (m3)','Total Cost (RM)','Average Cost (RM)',
                               'Max. Usage (m3)','Max. Cost (RM)']).astype(int)
In [6]:
df['Water Rate (RM/m3)'] = df['Total Cost (RM)']/df['Total Usage (m3)']
df = df.reindex(['Year',
                 'Total Usage (m3)',
                 'Total Cost (RM)',
                 'Water Rate (RM/m3)',
                 'Average Usage (m3)',
                 'Average Cost (RM)',
                 'Max. Usage (m3)',
                 'Max. Cost (RM)'], axis=1)
In [7]:
df.style.set_caption("Summary of Annual Water Supply Usage and Cost").set_table_styles([{
    'selector': 'caption',
    'props': [
        ('color', 'black'),
        ('font-size', '26px'),
        ("text-align", "center"),
        ('text-decoration', 'underline'),
        ('font-family','Arial'),
        ('text-shadow', '2px 2px 5px grey')
    ]},(dict
        (selector='th',props=[('text-align',
                               'left')]))]).format(
    {'Water Rate (RM/m3)':'{:,.2f}'}
).set_properties(subset=['Year'],**{'text-align': 'left'}).hide_index()
Out[7]:
Summary of Annual Water Supply Usage and Cost
Year Total Usage (m3) Total Cost (RM) Water Rate (RM/m3) Average Usage (m3) Average Cost (RM) Max. Usage (m3) Max. Cost (RM)
2019 77 44 0.57 6 3 13 7
2020 219 129 0.59 18 10 27 18
2021 341 203 0.60 28 16 35 21
2022 100 20 0.20 25 5 27 7
In [8]:
fig = make_subplots(
    rows=1, cols=2,
    specs=[[{"type": "xy"},{"type": "domain"}]],
    subplot_titles=('<b>Total Usage (m3)</b>','<b>Total Cost (RM)</b>')
)

fig.add_trace(go.Bar(x = df['Year'], y = df['Total Usage (m3)'],
                     name='Total Usage (m3)'),row=1, col=1)

fig.add_trace(go.Pie(values=df['Total Cost (RM)'],
                     labels=df['Year'],
                     textinfo='label+percent'),row=1, col=2)

fig.update_layout(height=500, showlegend=False,
                 title_text='Water Usage & Consumption', title_x=0.5)
fig.update_annotations(font=dict(family="Helvetica", size=12))
fig.update_layout(font=dict(family="Helvetica", size=12))

fig.show()
In [9]:
bar_plots = [
    go.Bar(x = df['Year'], y = df['Total Usage (m3)'],name='Total Usage (m3)'),
    go.Bar(x = df['Year'], y = df['Total Cost (RM)'],name='Total Cost (RM)')
           ]
layout = go.Layout(
title=go.layout.Title(text='Summary of Annual Water Supply Usage & Cost',x=0.5),
yaxis_title='Usage (m3)',xaxis_tickmode='array')
fig = go.Figure(data=bar_plots, layout=layout)
fig.show()

Detail of Water Consumption and Amount for Respective Year¶

In [10]:
bar_plots = [
    go.Bar(x = df1[2019], y = df1['Usage'],name=2019),
    go.Bar(x = df2[2020], y = df2['Usage'],name=2020),
    go.Bar(x = df3[2021], y = df3['Usage'],name=2021),
    go.Bar(x = df4[2022], y = df4['Usage'],name=2022)
           ]
layout = go.Layout(
title=go.layout.Title(text='Details of Annual Water Supply Usage',x=0.5),
yaxis_title='Usage (m3)',xaxis_tickmode='array')
fig = go.Figure(data=bar_plots, layout=layout)
fig.show()
In [11]:
bar_plots = [
    go.Bar(x = df1[2019], y = df1['Cost'],name=2019),
    go.Bar(x = df2[2020], y = df2['Cost'],name=2020),
    go.Bar(x = df3[2021], y = df3['Cost'],name=2021),
    go.Bar(x = df4[2022], y = df4['Cost'],name=2022)
           ]
layout = go.Layout(
title=go.layout.Title(text='Details of Annual Water Supply Cost',x=0.5),
yaxis_title='Cost (RM)',xaxis_tickmode='array')
fig = go.Figure(data=bar_plots, layout=layout)
fig.show()

Detail of Usage (m3) and Cost (RM) for Every Years¶

In [12]:
df1['Usage/Person (m3/p)'] = (df1['Usage']/5).round(2)
df2['Usage/Person (m3/p)'] = (df2['Usage']/5).round(2)
df3['Usage/Person (m3/p)'] = (df3['Usage']/5).round(2)
df4['Usage/Person (m3/p)'] = (df4['Usage']/5).round(2)
display_side_by_side(df1, df2, df3, df4)
2019 Usage Cost Usage/Person (m3/p)
0 Jan 4.57 2.6049 0.91
1 Feb 4.57 2.6049 0.91
2 Mar 4.57 2.6049 0.91
3 Apr 4.57 2.6049 0.91
4 May 4.57 2.6049 0.91
5 Jun 4.57 2.6049 0.91
6 Jul 4.57 2.6049 0.91
7 Aug 13.00 7.4100 2.60
8 Sep 6.00 3.4200 1.20
9 Oct 8.00 4.5600 1.60
10 Nov 10.00 5.7000 2.00
11 Dec 9.00 5.1300 1.80
2020 Usage Cost Usage/Person (m3/p)
0 Jan 8 0.00 1.6
1 Feb 8 0.00 1.6
2 Mar 8 6.00 1.6
3 Apr 9 6.00 1.8
4 May 20 11.40 4.0
5 Jun 21 12.45 4.2
6 Jul 24 15.50 4.8
7 Aug 26 17.60 5.2
8 Sep 24 15.50 4.8
9 Oct 24 15.50 4.8
10 Nov 20 11.40 4.0
11 Dec 27 18.60 5.4
2021 Usage Cost Usage/Person (m3/p)
0 Jan 28 19.65 5.6
1 Feb 27 18.60 5.4
2 Mar 26 17.60 5.2
3 Apr 26 17.60 5.2
4 May 28 19.65 5.6
5 Jun 30 21.70 6.0
6 Jul 25 16.55 5.0
7 Aug 27 18.60 5.4
8 Sep 27 18.61 5.4
9 Oct 35 15.45 7.0
10 Nov 32 12.36 6.4
11 Dec 30 7.55 6.0
2022 Usage Cost Usage/Person (m3/p)
0 Jan 24.0 4.12 4.8
1 Feb 22.0 2.06 4.4
2 Mar 27.0 7.20 5.4
3 Apr 27.0 7.20 5.4
4 May NaN NaN NaN
5 Jun NaN NaN NaN
6 Jul NaN NaN NaN
7 Aug NaN NaN NaN
8 Sep NaN NaN NaN
9 Oct NaN NaN NaN
10 Nov NaN NaN NaN
11 Dec NaN NaN NaN
In [13]:
bar_plots = [
    go.Bar(x = df1[2019], y = df1['Usage/Person (m3/p)'],name=2019),
    go.Bar(x = df2[2020], y = df2['Usage/Person (m3/p)'],name=2020),
    go.Bar(x = df3[2021], y = df3['Usage/Person (m3/p)'],name=2021),
    go.Bar(x = df4[2022], y = df4['Usage/Person (m3/p)'],name=2022)
           ]
layout = go.Layout(
title=go.layout.Title(text='Monthly Usage Per Person (m3/person)',x=0.5),
yaxis_title='Usage (m3/person)',xaxis_tickmode='array')
fig = go.Figure(data=bar_plots, layout=layout)
fig.show()
In [14]:
px.bar(df1, x = 2019, y = 'Usage', title = 'Monthly Water Supply Usage', color=2019, color_continuous_scale=px.colors.sequential.Viridis)
In [15]:
px.bar(df2, x = 2020, y = 'Usage', title = 'Monthly Water Supply Usage', color=2020, color_continuous_scale=px.colors.sequential.Viridis)
In [16]:
px.bar(df3, x = 2021, y = 'Usage', title = 'Monthly Water Supply Usage', color=2021, color_continuous_scale=px.colors.sequential.Viridis)
In [17]:
px.bar(df4, x = 2022, y = 'Usage', title = 'Monthly Water Supply Usage', color=2022, color_continuous_scale=px.colors.sequential.Viridis)
In [18]:
px.bar(df1, x = 2019, y = 'Cost', title = 'Monthly Water Supply Cost', color=2019, color_continuous_scale=px.colors.sequential.Viridis)
In [19]:
px.bar(df2, x = 2020, y = 'Cost', title = 'Monthly Water Supply Cost', color=2020, color_continuous_scale=px.colors.sequential.Viridis)
In [20]:
px.bar(df3, x = 2021, y = 'Cost', title = 'Monthly Water Supply Cost', color=2021, color_continuous_scale=px.colors.sequential.Viridis)
In [21]:
px.bar(df4, x = 2022, y = 'Cost', title = 'Monthly Water Supply Cost', color=2022, color_continuous_scale=px.colors.sequential.Viridis)

Note: This report is prepared by Zahiruddin Zahidanishah. This report is only for educational purposed and shall not be used for any commercial purposed.

In [22]:
%%capture
# export as HTML
!jupyter nbconvert --to html --no-input --no-prompt AXA.ipynb
In [ ]: